/*
 *  token lex for finsh shell.
 *
 * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
 *
 *  This file is part of RT-Thread (http://www.rt-thread.org)
 *  Maintainer: bernard.xiong <bernard.xiong at gmail.com>
 *
 *  All rights reserved.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Change Logs:
 * Date           Author       Notes
 * 2010-03-22     Bernard      first version
 * 2013-04-03     Bernard      strip more characters.
 */
#include <finsh.h>
#include <stdlib.h>

#include "finsh_token.h"
#include "finsh_error.h"

#define is_alpha(ch)    ((ch | 0x20) - 'a') < 26u
#define is_digit(ch)    ((ch) >= '0' && (ch) <= '9')
#define is_xdigit(ch)   (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u))
#define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
     || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
#define is_eof(self) (self)->eof

struct name_table {
	char* name;
	enum finsh_token_type type;
};

/* keyword */
static const struct name_table finsh_name_table[] = {
	{"void",        finsh_token_type_void},
	{"char",        finsh_token_type_char},
	{"short",       finsh_token_type_short},
	{"int",         finsh_token_type_int},
	{"long",        finsh_token_type_long},
	{"unsigned",    finsh_token_type_unsigned},

	{"NULL",        finsh_token_type_value_null},
	{"null",        finsh_token_type_value_null}
};

static char token_next_char(struct finsh_token* self);
static void token_prev_char(struct finsh_token* self);
static long token_spec_number(char* string, int length, int b);
static void token_run(struct finsh_token* self);
static int  token_match_name(struct finsh_token* self, const char* str);
static void token_proc_number(struct finsh_token* self);
static uint8_t* token_proc_string(struct finsh_token* self);
static void token_trim_space(struct finsh_token* self);
static char token_proc_char(struct finsh_token* self);
static int token_proc_escape(struct finsh_token* self);

void finsh_token_init(struct finsh_token* self, uint8_t* line)
{
	memset(self, 0, sizeof(struct finsh_token));

	self->line = line;
}

enum finsh_token_type finsh_token_token(struct finsh_token* self)
{
	if(self->replay) self->replay = 0;
	else token_run(self);

	return (enum finsh_token_type)self->current_token;
}

void finsh_token_get_token(struct finsh_token* self, uint8_t* token)
{
	strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
}

int token_get_string(struct finsh_token* self, uint8_t* str)
{
	unsigned char* p = str;
	char ch;

	ch = token_next_char(self);

	if(is_eof(self)) return -1;

	str[0] = '\0';

	if(is_digit(ch)) { /*the first character of identifier is not a digit.*/
		token_prev_char(self);
		return -1;
	}

	while(!is_separator(ch) && !is_eof(self)) {
		*p++ = ch;

		ch = token_next_char(self);
	}

	self->eof = 0;

	token_prev_char(self);
	*p = '\0';

	return 0;
}

/*
get next character.
*/
static char token_next_char(struct finsh_token* self)
{
	if(self->eof) return '\0';

	if(self->position == (int)strlen((char*)self->line) || self->line[self->position] == '\n') {
		self->eof = 1;
		self->position = 0;
		return '\0';
	}

	return self->line[self->position++];
}

static void token_prev_char(struct finsh_token* self)
{
	if(self->eof) return;

	if(self->position == 0) return;
	else self->position--;
}

static void token_run(struct finsh_token* self)
{
	char ch;

	token_trim_space(self); /* first trim space and tab. */
	token_get_string(self, &(self->string[0]));

	if(is_eof(self)) {  /*if it is eof, break;*/
		self->current_token = finsh_token_type_eof;
		return ;
	}

	if(self->string[0] != '\0') { /*It is a key word or a identifier.*/
		if(!token_match_name(self, (char*)self->string)) {
			self->current_token = finsh_token_type_identifier;
		}
	} else { /*It is a operator character.*/
		ch = token_next_char(self);

		switch(ch) {
			case '(':
				self->current_token = finsh_token_type_left_paren;
				break;

			case ')':
				self->current_token = finsh_token_type_right_paren;
				break;

			case ',':
				self->current_token = finsh_token_type_comma;
				break;

			case ';':
				self->current_token = finsh_token_type_semicolon;
				break;

			case '&':
				self->current_token = finsh_token_type_and;
				break;

			case '*':
				self->current_token = finsh_token_type_mul;
				break;

			case '+':
				ch = token_next_char(self);

				if(ch == '+') {
					self->current_token = finsh_token_type_inc;
				} else {
					token_prev_char(self);
					self->current_token = finsh_token_type_add;
				}

				break;

			case '-':
				ch = token_next_char(self);

				if(ch == '-') {
					self->current_token = finsh_token_type_dec;
				} else {
					token_prev_char(self);
					self->current_token = finsh_token_type_sub;
				}

				break;

			case '/':
				ch = token_next_char(self);

				if(ch == '/') {
					/* line comments, set to end of file */
					self->current_token = finsh_token_type_eof;
				} else {
					token_prev_char(self);
					self->current_token = finsh_token_type_div;
				}

				break;

			case '<':
				ch = token_next_char(self);

				if(ch == '<') {
					self->current_token = finsh_token_type_shl;
				} else {
					token_prev_char(self);
					self->current_token = finsh_token_type_bad;
				}

				break;

			case '>':
				ch = token_next_char(self);

				if(ch == '>') {
					self->current_token = finsh_token_type_shr;
				} else {
					token_prev_char(self);
					self->current_token = finsh_token_type_bad;
				}

				break;

			case '|':
				self->current_token = finsh_token_type_or;
				break;

			case '%':
				self->current_token = finsh_token_type_mod;
				break;

			case '~':
				self->current_token = finsh_token_type_bitwise;
				break;

			case '^':
				self->current_token = finsh_token_type_xor;
				break;

			case '=':
				self->current_token = finsh_token_type_assign;
				break;

			case '\'':
				self->value.char_value = token_proc_char(self);
				self->current_token = finsh_token_type_value_char;
				break;

			case '"':
				token_proc_string(self);
				self->current_token = finsh_token_type_value_string;
				break;

			default:
				if(is_digit(ch)) {
					token_prev_char(self);
					token_proc_number(self);
					break;
				}

				finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
				self->current_token = finsh_token_type_bad;

				break;
		}
	}
}

static int token_match_name(struct finsh_token* self, const char* str)
{
	int i;

	for(i = 0; i < sizeof(finsh_name_table) / sizeof(struct name_table); i++) {
		if(strcmp(finsh_name_table[i].name, str) == 0) {
			self->current_token = finsh_name_table[i].type;
			return 1;
		}
	}

	return 0;
}

static void token_trim_space(struct finsh_token* self)
{
	char ch;

	while((ch = token_next_char(self)) == ' ' ||
	        ch == '\t' ||
	        ch == '\r');

	token_prev_char(self);
}

static char token_proc_char(struct finsh_token* self)
{
	char ch;
	char buf[4], *p;

	p = buf;
	ch = token_next_char(self);

	if(ch == '\\') {
		ch = token_next_char(self);

		switch(ch) {
			case 'n':
				ch = '\n';
				break;

			case 't':
				ch = '\t';
				break;

			case 'v':
				ch = '\v';
				break;

			case 'b':
				ch = '\b';
				break;

			case 'r':
				ch = '\r';
				break;

			case '\\':
				ch = '\\';
				break;

			case '\'':
				ch = '\'';
				break;

			default :
				while(is_digit(ch)) { /*for '\113' char*/
					ch = token_next_char(self);
					*p++ = ch;
				}

				token_prev_char(self);
				*p = '\0';
				ch = atoi(p);
				break;
		}
	}

	if(token_next_char(self) != '\'') {
		token_prev_char(self);
		finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
		return ch;
	}

	return ch;
}

static uint8_t* token_proc_string(struct finsh_token* self)
{
	uint8_t* p;

	for(p = &self->string[0]; p - & (self->string[0]) < FINSH_STRING_MAX;) {
		char ch = token_next_char(self);

		if(is_eof(self)) {
			finsh_error_set(FINSH_ERROR_UNEXPECT_END);
			return NULL;;
		}

		if(ch == '\\') {
			ch = token_proc_escape(self);
		} else if(ch == '"') { /*end of string.*/
			*p = '\0';
			return self->string;
		}

		*p++ = ch;
	}

	return NULL;
}

static int token_proc_escape(struct finsh_token* self)
{
	char ch;
	int result = 0;

	ch = token_next_char(self);

	switch(ch) {
		case 'n':
			result = '\n';
			break;

		case 't':
			result = '\t';
			break;

		case 'v':
			result = '\v';
			break;

		case 'b':
			result = '\b';
			break;

		case 'r':
			result = '\r';
			break;

		case 'f':
			result = '\f';
			break;

		case 'a':
			result = '\007';
			break;

		case '"':
			result = '"';
			break;

		case 'x':
		case 'X':
			result = 0;
			ch  = token_next_char(self);

			while(is_xdigit(ch)) {
				result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10);
				ch = token_next_char(self);
			}

			token_prev_char(self);
			break;

		default:
			if((ch - '0') < 8u) {
				result = 0;

				while((ch - '0') < 8u) {
					result = result * 8 + ch - '0';
					ch = token_next_char(self);
				}

				token_prev_char(self);
			}

			break;
	}

	return result;
}

/*
(0|0x|0X|0b|0B)number+(l|L)
*/
static void token_proc_number(struct finsh_token* self)
{
	char ch;
	char* p, buf[128];
	long value;

	value = 0;
	p = buf;

	ch  = token_next_char(self);

	if(ch == '0') {
		int b;
		ch = token_next_char(self);

		if(ch == 'x' || ch == 'X') { /*it's a hex number*/
			b = 16;
			ch = token_next_char(self);

			while(is_digit(ch) || is_alpha(ch)) {
				*p++ = ch;
				ch = token_next_char(self);
			}

			*p = '\0';
		} else if(ch == 'b' || ch == 'B') {
			b = 2;
			ch = token_next_char(self);

			while((ch == '0') || (ch == '1')) {
				*p++ = ch;
				ch = token_next_char(self);
			}

			*p = '\0';
		} else if('0' <= ch && ch <= '7') {
			b = 8;

			while('0' <= ch && ch <= '7') {
				*p++ = ch;
				ch = token_next_char(self);
			}

			*p = '\0';
		} else {
			token_prev_char(self);

			/* made as 0 value */
			self->value.int_value = 0;
			self->current_token = finsh_token_type_value_int;
			return;
		}

		self->value.int_value = token_spec_number(buf, strlen(buf), b);
		self->current_token = finsh_token_type_value_int;
	} else {
		while(is_digit(ch)) {
			value = value * 10 + (ch - '0');
			ch = token_next_char(self);
		}

		self->value.int_value = value;
		self->current_token = finsh_token_type_value_int;
	}

	switch(ch) {
		case 'l':
		case 'L':
			self->current_token = finsh_token_type_value_long;
			break;

		default:
			token_prev_char(self);
			break;
	}
}

/*use 64 bit number*/
#define BN_SIZE 2

static long token_spec_number(char* string, int length, int b)
{
	char* p;
	int t;
	int i, j, shift = 1;
	unsigned int bn[BN_SIZE], v;
	long d;

	p = string;
	i = 0;

	switch(b) {
		case 16:
			shift = 4;
			break;

		case 8:
			shift = 3;
			break;

		case 2:
			shift = 1;
			break;

		default:
			break;
	}

	for(j = 0; j < BN_SIZE ; j++) bn[j] = 0;

	while(i < length) {
		t = *p++;

		if(t >= 'a' && t <= 'f') {
			t = t - 'a' + 10;
		} else if(t >= 'A' && t <= 'F') {
			t = t - 'A' + 10;
		} else t = t - '0';

		for(j = 0; j < BN_SIZE ; j++) {
			v = bn[j];
			bn[j] = (v << shift) | t;
			t = v >> (32 - shift);
		}

		i++;
	}

	d = (long)bn[0];

	return d;
}
